{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Dates in timeseries models"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import statsmodels.api as sm\n",
    "import numpy as np\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Getting started"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\statsmodels\\datasets\\utils.py:337: FutureWarning: load will return datasets containing pandas DataFrames and Series in the Future.  To suppress this message, specify as_pandas=False\n",
      "  FutureWarning)\n"
     ]
    }
   ],
   "source": [
    "data = sm.datasets.sunspots.load()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Right now an annual date series must be datetimes at the end of the year."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "from datetime import datetime\n",
    "dates = sm.tsa.datetools.dates_from_range('1700', length=len(data.endog))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using Pandas\n",
    "\n",
    "Make a pandas TimeSeries or DataFrame"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "endog = pd.Series(data.endog, index=dates)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Instantiate the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "ar_model = sm.tsa.AR(endog, freq='A')\n",
    "pandas_ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Out-of-sample prediction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "2005-12-31    20.003286\n",
      "2006-12-31    24.703981\n",
      "2007-12-31    20.026127\n",
      "2008-12-31    23.473636\n",
      "2009-12-31    30.858573\n",
      "2010-12-31    61.335449\n",
      "2011-12-31    87.024688\n",
      "2012-12-31    91.321252\n",
      "2013-12-31    79.921628\n",
      "2014-12-31    60.799529\n",
      "2015-12-31    40.374890\n",
      "Freq: A-DEC, dtype: float64\n"
     ]
    }
   ],
   "source": [
    "pred = pandas_ar_res.predict(start='2005', end='2015')\n",
    "print(pred)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Using explicit dates"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[20.003286   24.70398105 20.026127   23.47363632 30.85857305 61.33544859\n",
      " 87.02468802 91.3212518  79.9216276  60.79952934 40.37488956]\n"
     ]
    }
   ],
   "source": [
    "ar_model = sm.tsa.AR(data.endog, dates=dates, freq='A')\n",
    "ar_res = ar_model.fit(maxlag=9, method='mle', disp=-1)\n",
    "pred = ar_res.predict(start='2005', end='2015')\n",
    "print(pred)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "This just returns a regular array, but since the model has date information attached, you can get the prediction dates in a roundabout way."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "DatetimeIndex(['2005-12-31', '2006-12-31', '2007-12-31', '2008-12-31',\n",
      "               '2009-12-31', '2010-12-31', '2011-12-31', '2012-12-31',\n",
      "               '2013-12-31', '2014-12-31', '2015-12-31'],\n",
      "              dtype='datetime64[ns]', freq='A-DEC')\n"
     ]
    }
   ],
   "source": [
    "print(ar_res.data.predict_dates)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Note: This attribute only exists if predict has been called. It holds the dates associated with the last call to predict."
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.1"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
